library(foreign)
library(plyr)
library(xtable)
library(arm)	
library(Amelia)
library(MASS)	
library(car)
library(gmodels)
library(boot)

load('imputed_data.Rdata')
source('colnames2labels.R')
###Function to compute clustered standard errors
### Modified for glm following http://projects.iq.harvard.edu/files/gov2001/files/sesection_5.pdf
clse.f.glm <- function(dat,fm, cluster){
	require(sandwich)
	require(lmtest)
	not <- attr(fm$model,"na.action")
	if( ! is.null(not)){
		cluster <- cluster[-not]
		dat <- dat[-not,]
	}
	with(dat,{
	M <- length(unique(cluster))
	N <- length(cluster)	
	K <- fm$rank
	dfc <- (M/(M-1))*((N-1)/(N-K))
	uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
	vcovCL <- dfc*vcov(fm) %*% t(uj) %*% uj %*% vcov(fm)
	coeftest(fm, vcovCL)
	}
	)
}	

################### Logistic regression model for leniency ###################

uniq.panel <- appeals$uniq.panel

####DEFINE OUTCOME AND CONDITION ON APPELLANT #####

outcome <- 'lenient'
#use defendant appeals (for lenient)
appell.cond <- which(appeals$appellant1 == 1)

uniq.panel <- factor(uniq.panel[appell.cond])
for(i in c(1:n.impute)){
	a.out$imputations[[i]] <- a.out$imputations[[i]][appell.cond,]
	a.out$imputations[[i]] <- a.out$imputations[[i]][,-which(colnames(a.out$imputations[[i]]) == 'uniq.panel')]
}

form.red <- as.formula(paste(outcome, '~ narabs_J*accused_arab'))

n.model <- 4
############################# FIT MODELS ######################################
full.model.vars <- list(
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record'),#Model 1: Basic model - just treatment, court effect, and accused covariates
#Model 2: Additional accused covariates + judge covariates
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j'),
#Model 3: Additional accused covariates + judge covariates + offense covariates
c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j','offense1','offense2','offense3','offense4','victimRace2'),
#Model 4: Additional accused covariates + judge covariates + offense covariates + court process covariates
 c('court_Nazareth', 'court_Jerusalem','accused_female','criminal_record', 'avg_jage','avg_jexp','female_j','offense1','offense2','offense3','offense4','victimRace2','prequest2','methodGuilty','methodTrial'))

my.model.tables <- list()
aic.vals <- vector(length = n.model) 

#set random seed to ensure reproducibility of bootstrap standard errors
set.seed(232523)
for(j in c(1:length(full.model.vars))){
	vars.of.interest <- full.model.vars[[j]]
	form.chosen <- as.formula(paste(outcome, '~ narabs_J*accused_arab +', 
	paste(vars.of.interest, 	collapse='+')))
	
	lm.final <- llply(a.out$imputations, function(dat){glm(form.chosen, data = dat, family = binomial(link = logit))})
	#function to get covariance of interaction terms
	extract.cov <- function(my.lm){
		my.cov <- vcov(my.lm)
		my.cov[which(rownames(my.cov) == 'narabs_J'),
		which(colnames(my.cov) == 'narabs_J:accused_arab')]
	}
	#function to get marginal effects
	get.margins <- function(dat, idx){
		boot.lm <- glm(form.chosen, data = dat[idx,], family = binomial(link = logit))
		my.mm <- model.matrix(lm.final[[i]])
		
		coeff.arab <- coef(boot.lm)[which(names(coef(boot.lm)) == 'accused_arab')]
		coeff.mix <- coef(boot.lm)[which(names(coef(boot.lm)) == 'narabs_J')]
		coeff.mix.arab <- coef(boot.lm)[which(names(coef(boot.lm)) == 'narabs_J:accused_arab')]
		
		#go through the rows of the model matrix
		#for each row, calculate a marginal effect.  If the row is a Jew, add it to vector storing effects for Jews; if it is an Arab, store it in vector storing effects for Arabs
		marg.eff.arab <- vector()
		marg.eff.jew <- vector()
		for(i in c(1:nrow(my.mm))){
			#compute overall impact of other coefficients
			main.coeffs <- which(colnames(my.mm) %in% c('accused_arab','narabs_J','narabs_J:accused_arab') | is.na(coef(boot.lm))) #also include NA coefs			
			other.coeffs <- sum(my.mm[i,-main.coeffs]*coef(boot.lm)[-main.coeffs])
			
			is.arab <- my.mm[i,which(colnames(my.mm) == 'accused_arab')]
			marg.eff <- 1/(1 + exp(-(other.coeffs + coeff.arab*is.arab + coeff.mix + coeff.mix.arab*is.arab))) - 1/(1 + exp(-(other.coeffs + coeff.arab*is.arab)))
			
			if(is.arab){
				marg.eff.arab <- append(marg.eff.arab, marg.eff)
			}else{
				marg.eff.jew <- append(marg.eff.jew, marg.eff)
			}	
		}
		return(c(mean(marg.eff.jew), mean(marg.eff.arab)))	
	}

	#pull needed estimates out of model
	lm.tables <- list()
	for(i in c(1:n.impute)){
		#clustered errors
		coef.table <- clse.f.glm(a.out$imputations[[i]], lm.final[[i]], uniq.panel)

		#add AIC info
		aic.table <- cbind(c( nrow(coef.table) - 1,lm.final[[i]]$aic, nrow(lm.final[[i]]$data) ), matrix(NA, nrow = 3, ncol = 3))
		
		rownames(aic.table) <- c('N. Predictors','AIC Value','Observations')
		
		#add marginal effects
		marg.eff.star <- boot(a.out$imputations[[i]], get.margins, R = 2000)
		marg.eff.jew <- c(marg.eff.star$t0[1], sd(marg.eff.star$t[,1]), NA, NA) #add t- and p-values later
		marg.eff.arab <- c(marg.eff.star$t0[2], sd(marg.eff.star$t[,2]), NA, NA) #add t- and p-values later
		
		#add marginal effect rows to coef table for easy recombination of amelia estimates
		lm.tables[[i]] <- rbind(coef.table, aic.table,
			marg.eff.jew,
			marg.eff.arab)
	}

	#combine amelia estimates
	coeffs.mat <- laply(lm.tables, function(x){x[,1]})
	stderr.mat <- laply(lm.tables, function(x){x[,2]})
	regr.df <- lm.final[[i]]$df.residual #save degrees of freedom for use in computing p-values later
	meld.out <- mi.meld(coeffs.mat, stderr.mat)

	final.table <- as.data.frame(cbind(t(meld.out$q.mi), t(meld.out$se.mi)))

	colnames(final.table) <- c('Estimate','Std.Error')
	final.table$t.value <- final.table$Estimate/final.table$Std.Error
	final.table$p.value <- 2 - 2*pt(abs(final.table$t.value), df = regr.df)
	nr <- nrow(final.table)
	my.model.tables[[j]] <- final.table
}



table.sections <- llply(my.model.tables, function(x){x[,c(1,2,4)]})
all.rownames <- unique(unlist(llply(table.sections, rownames)))
colname.stubs <- c('Est','SE','Pval')
all.colnames <- paste(colname.stubs, as.vector(t(matrix(rep(c(1:n.model),3), ncol = 3))), sep = '_')
models.tab <- matrix(0, ncol = length(all.colnames), nrow = length(all.rownames), dimnames = list(all.rownames, all.colnames))

expand.tab <- function(source.tab,target.names){
	row.idx <- match(rownames(source.tab), target.names)
	out.mat <- matrix(NA,ncol = ncol(source.tab), nrow = length(target.names))
	out.mat[row.idx,] <- as.matrix(source.tab)
	return(out.mat)
}
for(i in c(1:length(table.sections))){
	models.tab[,c(3*i-2,3*i - 1, 3*i)] <- expand.tab(table.sections[[i]],rownames(models.tab))
}

var.counts <- laply(table.sections, nrow)
aic.info <- rbind(var.counts - 3, aic.vals) #subtract intercept, marg.effects
rownames(aic.info) <- c('Number of Predictors','AIC Value')
colnames(aic.info) <- paste('Model', c(1:n.model))

############################# MAKE TABLE 5  ######################################
#build marginal effects table
marg.effect.tab <- rbind(
	models.tab[which(rownames(models.tab) == 'marg.eff.jew'),],
	models.tab[which(rownames(models.tab) == 'marg.eff.arab'),])
marg.effect.tab <- marg.effect.tab[,-3*(c(1:n.model))] #drop p.vals
rownames(marg.effect.tab) <- c('Marginal Effect for Jewish Accused', 'Marginal Effect for Arab Accused')


#clean up main table
latex.tab <- round(models.tab,3)
#rearrange rows so interesting coefficients are at top
aa.idx <- which(rownames(latex.tab) == 'accused_arab')
nJ.idx <- which(rownames(latex.tab) == 'narabs_J')
nJ.aa.idx <- which(rownames(latex.tab) == 'narabs_J:accused_arab')
int.idx <- which(rownames(latex.tab) == '(Intercept)')
n.pred.idx <- which(rownames(latex.tab) == 'N. Predictors')
aic.idx <- which(rownames(latex.tab) == 'AIC Value')
obs.idx <- which(rownames(latex.tab) == 'Observations')
mj.idx <- which(rownames(latex.tab) == 'marg.eff.jew')
ma.idx <- which(rownames(latex.tab) == 'marg.eff.arab')
new.order <- c(aa.idx,nJ.idx, nJ.aa.idx, c(1:nrow(latex.tab))[-c(nJ.idx, nJ.aa.idx, aa.idx, int.idx, n.pred.idx, aic.idx, obs.idx, mj.idx, ma.idx)], int.idx, n.pred.idx, aic.idx, obs.idx, mj.idx, ma.idx)
latex.tab <- latex.tab[new.order,]
latex.tab[is.na(latex.tab)] <- ''

#add parentheses around standard errors
stderr.idx <- grep('SE', x = colnames(latex.tab))
latex.tab[,stderr.idx] <- apply(latex.tab[,stderr.idx], 2, function(x){paste('(',x,')',sep = '')})

latex.tab[latex.tab == '()'] <- ''

print("TABLE 5")
print(xtable(names2labels(latex.tab, col.or.row = 'row')))

############################# MAKE FIGURE 1 (part 1) ######################################
stderr.idx <- 2*c(1:(ncol(marg.effect.tab)/2))
eff.idx <- stderr.idx - 1

N <- ncol(marg.effect.tab)/2
m.effects <- c(marg.effect.tab[1,eff.idx], marg.effect.tab[2,eff.idx])
discrep <- c(marg.effect.tab[1,stderr.idx], marg.effect.tab[2, stderr.idx])*1.96
m.effects.up <- m.effects + discrep
m.effects.dwn <- m.effects - discrep
ylims <- c( min(-.1, min(m.effects.dwn, na.rm = TRUE)), max(.1, max(m.effects.up, na.rm = TRUE)))

plot(rep(c(1:N),2) + c(rep(-0.1,N),rep(0.1,N)), m.effects, ylim = ylims, main =paste('Prob. of',outcome,'outcome'),
	 xlab = 'Model Number', ylab = 'Change in Predicted Probability for Mixed Panel', 
	pch = 20, xaxt = 'n')
axis(1, at = 1:4)
for(i in rep(c(1:N))){
	lines(rep(i,2) - 0.1,c(m.effects.up[i], m.effects.dwn[i]), lty = 'dotted')
	lines(rep(i,2) + 0.1,c(m.effects.up[i + N], m.effects.dwn[i + N]), lty = 'solid')
}
abline(h = 0, lty = 'dotted')
legend('bottomright', c('Jewish Accused','Arab Accused'), lty = c('dotted','solid'))



####################### MAKE TABLE 13 ####################################
#contingency tables, marginal-effect style

	cont.tab <- list()
	for(j in c(1:n.impute)){

		boot.lm <- lm.final[[j]]
		my.mm <- model.matrix(lm.final[[j]])

		coeff.arab <- coef(boot.lm)[which(names(coef(boot.lm)) == 'accused_arab')]
		coeff.mix <- coef(boot.lm)[which(names(coef(boot.lm)) == 'narabs_J')]
		coeff.mix.arab <- coef(boot.lm)[which(names(coef(boot.lm)) == 'narabs_J:accused_arab')]

		pred.mix.jew <- vector()
		pred.nomix.jew <- vector()
		pred.nomix.arab <- vector()
		pred.mix.arab <- vector()

		for(i in c(1:nrow(my.mm))){
			#compute overall impact of other coefficients
			main.coeffs <- which(colnames(my.mm) %in% c('accused_arab','narabs_J','narabs_J:accused_arab') | is.na(coef(boot.lm))) #also include NA coefs			
			other.coeffs <- sum(my.mm[i,-main.coeffs]*coef(boot.lm)[-main.coeffs])
			
			#is.mix <- my.mm[i,which(colnames(my.mm) == 'narabs_J')]
			is.arab <- my.mm[i,which(colnames(my.mm) == 'accused_arab')]
			pred.prob.mix <- 1/(1 + exp(-(other.coeffs + coeff.arab*is.arab + coeff.mix + coeff.mix.arab*is.arab)))
			pred.prob.nomix <- 1/(1 + exp(-(other.coeffs + coeff.arab*is.arab)))
			
			if(is.arab){
				pred.mix.arab <- append(pred.mix.arab,pred.prob.mix)
				pred.nomix.arab <- append(pred.nomix.arab,pred.prob.nomix)
			}else{
				pred.mix.jew <- append(pred.mix.jew, pred.prob.mix)			
				pred.nomix.jew <- append(pred.nomix.jew, pred.prob.nomix)			
			}
				
		}
		cont.tab[[j]] <- c(mean(pred.nomix.jew),mean(pred.mix.jew), mean(pred.nomix.arab), mean(pred.mix.arab))
	}
	meld.out <- mi.meld(laply(cont.tab, function(x)x), matrix(NA, ncol = 4, nrow = length(cont.tab)))
	out.tab <- matrix(meld.out$q.mi, nrow = 2, ncol =2)
	dimnames(out.tab) <- list(c('All-Jewish Panel','Mixed Panel'),c('Jew','Arab'))
	print("TABLE 13")
	print(xtable(out.tab, digits = 2))